###############################################
#####  Replication for data visualization  ####
### Party institutionalization and pretest  ###
#####            July 1, 2024             #####
#####           M. Rosemary Pang          #####
###############################################

### Set working directory and load packages ###
setwd('/Users/mpang/Documents/RESEARCH/PartyInstitution/replication file')
library(ggplot2)
library(haven)
library(data.table)
library(reshape2)
library(xlsx)
library(gridExtra)
library(dplyr)


### Figure 1 Comparing different measure of corruption in Zambia ###
Corruption <- read_dta("corruption_rescaled.dta")
Zambia <- Corruption[which(Corruption$gwf_caseid==279),]
ggplot(Zambia, aes(year))+
  geom_line(aes(y=VDem, colour="V-Dem"))+
  geom_line(aes(y=CPI2, colour="Transparency International"))+
  geom_line(aes(y=CCE2, colour="World Bank"))+
  labs(title="Different Measures of Corruption in Zambia", y="Corruption")

### Figure 2 ICC of the latent variable ###
pidata <- read.csv("IRTdiscrim.csv")

pidata_sub <- pidata[,c(3:9)]
pireshape <- melt(pidata_sub, id=c("pi"))

#subsetting for binary variables in IRT model
bipi <- pireshape[1:12582,]


#subsetting for continuous variables in IRT model, normalize probability
contipi <- pireshape[12583:25164,]

ggplot(bipi) +
  geom_line(aes(x=pi, y=value, colour=variable)) +
  scale_colour_manual(values=c("red","green","blue"))+
  ylim(0,1)+
  labs(x="party institutionalization",y="predicted discrimination",title="Item Characteristic Curve")

ggplot(contipi) +
  geom_line(aes(x=pi, y=value, colour=variable)) +
  scale_colour_manual(values=c("orange","brown","black"))+
  labs(x="party institutionalization",y="predicted discrimination",title="Item Characteristic Curve")


### Figure 2 Histogram of party institutionalization ###
hist(pidata$pi, 
     ylim = c(0,8),
     col = "white",
     breaks = seq(0,1,by=0.04),freq = FALSE,
     main='Distribution of Ruling Party Institutionalization',
     xlab="Party Institutionalization",
     cex.main=0.9,cex.lab=0.7)

#### Party institutionalization change within country ####
## Figure A1 in Appendix ##
Fulldata <- read_dta("DatawithPI.dta")

countries <- Fulldata %>%
  filter(cowcode %in% c(850, 70)) %>%
  select(year, gwf_country, pi, partylocal,
         partyhistory, heirparty, v2palocoff,
         v2paactcom, v2pasoctie, supportparty)

ggplot(countries, aes(x = year, y = pi, color = gwf_country)) +
  geom_line(size = 1) +
  labs(
    title = "Time Trend of Ruling Party Institutionalization",
    x = "Year",
    y = "Ruling Party Institutionalization",
    color = "Country"
  ) +
  theme_minimal()


#### Party institutionalization and VDem graph ####
## Figure A2 in Appendix ##
hist(Fulldata[Fulldata$pi==0&Fulldata$supportparty==0,]$v2xps_party, 
     breaks = seq(0,1,by=0.04),freq = FALSE,
     col = "white",
     main='Party system institutionalization when No ruling party',
     xlab="VDem party institutionalization",
     cex.main=0.9,cex.lab=0.7)

## Figure A3 in Appendix ##
p <- ggplot(Fulldata,aes(y=v2xps_party, x=pi)) + geom_point(color="grey") +
  stat_smooth(method = 'lm', formula = y ~ poly(x,2), se= FALSE) +
  labs(title="Autocratic party systems and Ruling party institutionalization",
       x=expression(italic("Ruling party")~"institutionalization"),y=expression("VDem party"~italic("system") ~ "institutionalization"))
p


## Figure A4 in Appendix ##
hist(Fulldata$mean5,
     col = "white",
     xlab="Latent Measure of Protest",main="Histogram with Density Curve",prob="TRUE")
lines(density(Fulldata$mean5,na.rm = TRUE),col="red")


##### Figure 3: Marginal effect#####
marginal <- read.csv("marginal.csv")
ggplot(marginal, aes(x=pi))+
  geom_ribbon(data=marginal, aes(ymin=lwr,ymax=upr),fill="grey80")+
  geom_line(data=marginal,aes(y=fit))+
  geom_vline(xintercept = 0.22, colour = gray(1/2), lty = 2)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 1)+
  labs(y="Marginal Effect of Corruption on Protest",x="Party Institutionalization")



##### Figure B1 in Appendix B-1, other covariates #####
### Corruptionct
corruption <- read.csv("DVcorruption.csv")

interval1 <- qnorm((1-0.9)/2)
interval2 <- qnorm((1-0.95)/2)

p1 <- ggplot(corruption)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_hline(yintercept=0.052,colour="red",linetype=2)+
  geom_linerange(aes(x = term, ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 1/2))+
  geom_pointrange(aes(x = term, y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  shape = 21, fill = "WHITE")+
  coord_flip()+
  ylab("Coefficient Estimate: Corruption") + xlab("")

### Interaction
interaction <- read.csv('DVinteraction.csv')

interval1 <- qnorm((1-0.9)/2)
interval2 <- qnorm((1-0.95)/2)

p2 <- ggplot(interaction)+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_hline(yintercept=-0.073,colour="red",linetype=2)+
  geom_linerange(aes(x = term, ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 1/2))+
  geom_pointrange(aes(x = term, y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  shape = 21, fill = "WHITE")+
  coord_flip()+
  ylab("Coefficient Estimate: Interaction") + xlab("")

grid.arrange(p1,p2,ncol=2)


###### Figure B2 in Appendix B-2: Alternative Unit Effect Models #####
uniteffect <- read.csv("uniteffect.csv")
uniteffect <- uniteffect[which(uniteffect$term!="(Intercept)"),]

interval1 <- qnorm((1-0.9)/2)
interval2 <- qnorm((1-0.95)/2)

ggplot(uniteffect,aes(colour=model))+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_linerange(aes(x = term, ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 1/2))+
  geom_pointrange(aes(x = term, y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  shape = 21, fatten=1.5, fill = "WHITE")+
  coord_flip()+theme_bw()+
  ylab("Coefficient Estimate") + xlab("")


##### Figure B3 in Appendix B-3: Support & Inherit party ######
support <- read.csv("supportparty.csv")
support <- support[which(support$term!="(Intercept)"),]

interval1 <- qnorm((1-0.9)/2)
interval2 <- qnorm((1-0.95)/2)

ggplot(support,aes(colour=model))+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_linerange(aes(x = term, ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 1/2))+
  geom_pointrange(aes(x = term, y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  shape = 21, fatten=1.5, fill = "WHITE")+
  coord_flip()+theme_bw()+
  ylab("Coefficient Estimate") + xlab("")


##### Figure B4 in Appendix B-4:Time trend #####
time <- read.csv("time.csv")
time <- time[which(time$term!="(Intercept)"),]

interval1 <- qnorm((1-0.9)/2)
interval2 <- qnorm((1-0.95)/2)

ggplot(time,aes(colour=model))+
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2)+
  geom_linerange(aes(x = term, ymin = estimate - std.error*interval1,
                     ymax = estimate + std.error*interval1),
                 lwd = 1, position = position_dodge(width = 1/2))+
  geom_pointrange(aes(x = term, y = estimate, ymin = estimate - std.error*interval2,
                      ymax = estimate + std.error*interval2),
                  lwd = 1/2, position = position_dodge(width = 1/2),
                  shape = 21, fatten=1.5, fill = "WHITE")+
  coord_flip()+theme_bw()+
  ylab("Coefficient Estimate") + xlab("")



##### Figures in Appendix C #####
## Figure C1: regress on time ##
regression <- read.csv("regression.csv")

low <- regression[which(regression$pi<=.2057842),]
median <- regression[which(regression$pi>.2057842|regression$pi<.6327516),]
high <- regression[which(regression$pi>=.6327516),]
corruption.res1 <- resid(lm(corruptionct~time+ld,data=low))
protest.res1 <- resid(lm(mean5~time+ld,data=low))
data1 <- as.data.frame(cbind(corruption.res1,protest.res1))
p1 <- ggplot(data=data1,aes(x=corruption.res1,y=protest.res1))+
  geom_point(color="gray60")+
  geom_smooth(method='lm',formula = y ~ poly(x,1), se= FALSE,data=data1,color="blue")+
  labs(x="Corruption",y="Protest",title='Institutionalization [0,0.21]')

corruption.res2 <- resid(lm(corruptionct~time+ld,data=median))
protest.res2 <- resid(lm(mean5~time+ld,data=median))
data2 <- as.data.frame(cbind(corruption.res2,protest.res2))
p2 <- ggplot(data=data2,aes(x=corruption.res2,y=protest.res2))+
  geom_point(color="gray60")+
  geom_smooth(method='lm',formula = y ~ poly(x,1), se= FALSE,data=data2,color="blue")+
  labs(x="Corruption",y="Protest",title='Institutionalization (0.21,0.63)')


corruption.res3 <- resid(lm(corruptionct~time+ld,data=high))
protest.res3 <- resid(lm(mean5~time+ld,data=high))
data3 <- as.data.frame(cbind(corruption.res3,protest.res3))
p3 <- ggplot(data=data3,aes(x=corruption.res3,y=protest.res3))+
  geom_point(color="gray60")+
  geom_smooth(method='lm',formula = y ~ poly(x,1), se= FALSE,data=data3,color="blue")+
  labs(x="Corruption",y="Protest",title='Institutionalization [0.63,1]')

grid.arrange(p1,p2,p3,ncol=3)

## Figure C2: kernel graph ##
kernel <- read.csv("marginal.csv")
pMain <- ggplot(regression, aes(x=pi))+
  geom_ribbon(data=kernel, aes(ymin=lwr,ymax=upr),fill="grey80")+
  geom_line(data=kernel,aes(y=fit))+
  labs(y="Marginal Effect of Corruption on Protest",x="")
plow <- ggplot(data=regression,aes(x=pi))+
  geom_histogram(aes(y=..density..),
                 breaks=seq(0,1,by=0.05),
                 col="black",fill="white")+
  labs(y="density",x="Party Institutionalization")
grid.arrange(pMain,plow,nrow=2,heights=c(3,1))
